<!DOCTYPE html>
<html lang="zh-CN">
<head>
  <meta charset="UTF-8">
<meta name="viewport" content="width=device-width">
<meta name="theme-color" content="#222"><meta name="generator" content="Hexo 6.3.0">

  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png">
  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png">
  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png">
  <link rel="mask-icon" href="/images/logo.svg" color="#222">

<link rel="stylesheet" href="/css/main.css">



<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.2.1/css/all.min.css" integrity="sha256-Z1K5uhUaJXA7Ll0XrZ/0JhX4lAtZFpT6jkKrEDT0drU=" crossorigin="anonymous">
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/animate.css/3.1.1/animate.min.css" integrity="sha256-PR7ttpcvz8qrF57fur/yAx1qXMFJeJFiA6pSzWi0OIE=" crossorigin="anonymous">

<script class="next-config" data-name="main" type="application/json">{"hostname":"example.com","root":"/","images":"/images","scheme":"Muse","darkmode":false,"version":"8.14.1","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12},"copycode":{"enable":false,"style":null},"bookmark":{"enable":false,"color":"#222","save":"auto"},"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"stickytabs":false,"motion":{"enable":true,"async":false,"transition":{"menu_item":"fadeInDown","post_block":"fadeIn","post_header":"fadeInDown","post_body":"fadeInDown","coll_header":"fadeInLeft","sidebar":"fadeInUp"}},"prism":false,"i18n":{"placeholder":"搜索...","empty":"没有找到任何搜索结果：${query}","hits_time":"找到 ${hits} 个搜索结果（用时 ${time} 毫秒）","hits":"找到 ${hits} 个搜索结果"},"path":"/search.xml","localsearch":{"enable":true,"trigger":"auto","top_n_per_article":-1,"unescape":false,"preload":false}}</script><script src="/js/config.js"></script>

    <meta name="description" content="正向索引：存储每个文档的单词的列表。 正向索引的查询往往满足每个文档有序频繁的全文查询。  倒排索引 反向索引数据结构是典型的搜索引擎检索算法重要的部分。  倒排索引（英语：Inverted index），也常被称为反向索引、置入档案或反向档案，是一种索引方法，被用来存储在全文搜索下某个单词在一个文档或者一组文档中的存储位置的映射。它是文档检索系统中最常用的数据结构。 有两种不同的反向索引形式：">
<meta property="og:type" content="article">
<meta property="og:title" content="倒排索引">
<meta property="og:url" content="http://example.com/2022/08/12/%E5%80%92%E6%8E%92%E7%B4%A2%E5%BC%95/index.html">
<meta property="og:site_name" content="JsyBlog">
<meta property="og:description" content="正向索引：存储每个文档的单词的列表。 正向索引的查询往往满足每个文档有序频繁的全文查询。  倒排索引 反向索引数据结构是典型的搜索引擎检索算法重要的部分。  倒排索引（英语：Inverted index），也常被称为反向索引、置入档案或反向档案，是一种索引方法，被用来存储在全文搜索下某个单词在一个文档或者一组文档中的存储位置的映射。它是文档检索系统中最常用的数据结构。 有两种不同的反向索引形式：">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="https://wikimedia.org/api/rest_v1/media/math/render/svg/1ab3d9202f18a678affe9a339511bef0a7b8b110">
<meta property="og:image" content="https://wikimedia.org/api/rest_v1/media/math/render/svg/1ab3d9202f18a678affe9a339511bef0a7b8b110">
<meta property="og:image" content="https://wikimedia.org/api/rest_v1/media/math/render/svg/57ac5840245616f79874f4635b3bcb2e3da344dd">
<meta property="og:image" content="https://wikimedia.org/api/rest_v1/media/math/render/svg/57ac5840245616f79874f4635b3bcb2e3da344dd">
<meta property="og:image" content="https://wikimedia.org/api/rest_v1/media/math/render/svg/5713cf5634b3846b4d68c3383b65db289511d8bf">
<meta property="og:image" content="https://wikimedia.org/api/rest_v1/media/math/render/svg/5713cf5634b3846b4d68c3383b65db289511d8bf">
<meta property="article:published_time" content="2022-08-12T02:23:03.000Z">
<meta property="article:modified_time" content="2022-09-02T14:44:06.080Z">
<meta property="article:author" content="SongyangJi">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://wikimedia.org/api/rest_v1/media/math/render/svg/1ab3d9202f18a678affe9a339511bef0a7b8b110">


<link rel="canonical" href="http://example.com/2022/08/12/%E5%80%92%E6%8E%92%E7%B4%A2%E5%BC%95/">



<script class="next-config" data-name="page" type="application/json">{"sidebar":"","isHome":false,"isPost":true,"lang":"zh-CN","comments":true,"permalink":"http://example.com/2022/08/12/%E5%80%92%E6%8E%92%E7%B4%A2%E5%BC%95/","path":"2022/08/12/倒排索引/","title":"倒排索引"}</script>

<script class="next-config" data-name="calendar" type="application/json">""</script>
<title>倒排索引 | JsyBlog</title>
  








  <noscript>
    <link rel="stylesheet" href="/css/noscript.css">
  </noscript>
</head>

<body itemscope itemtype="http://schema.org/WebPage" class="use-motion">
  <div class="headband"></div>

  <main class="main">
    <div class="column">
      <header class="header" itemscope itemtype="http://schema.org/WPHeader"><div class="site-brand-container">
  <div class="site-nav-toggle">
    <div class="toggle" aria-label="切换导航栏" role="button">
        <span class="toggle-line"></span>
        <span class="toggle-line"></span>
        <span class="toggle-line"></span>
    </div>
  </div>

  <div class="site-meta">

    <a href="/" class="brand" rel="start">
      <i class="logo-line"></i>
      <p class="site-title">JsyBlog</p>
      <i class="logo-line"></i>
    </a>
  </div>

  <div class="site-nav-right">
    <div class="toggle popup-trigger" aria-label="搜索" role="button">
        <i class="fa fa-search fa-fw fa-lg"></i>
    </div>
  </div>
</div>



<nav class="site-nav">
  <ul class="main-menu menu"><li class="menu-item menu-item-home"><a href="/" rel="section"><i class="fa fa-home fa-fw"></i>首页</a></li><li class="menu-item menu-item-tags"><a href="/tags/" rel="section"><i class="fa fa-tags fa-fw"></i>标签</a></li><li class="menu-item menu-item-categories"><a href="/categories/" rel="section"><i class="fa fa-th fa-fw"></i>分类</a></li><li class="menu-item menu-item-archives"><a href="/archives/" rel="section"><i class="fa fa-archive fa-fw"></i>归档</a></li>
      <li class="menu-item menu-item-search">
        <a role="button" class="popup-trigger"><i class="fa fa-search fa-fw"></i>搜索
        </a>
      </li>
  </ul>
</nav>



  <div class="search-pop-overlay">
    <div class="popup search-popup"><div class="search-header">
  <span class="search-icon">
    <i class="fa fa-search"></i>
  </span>
  <div class="search-input-container">
    <input autocomplete="off" autocapitalize="off" maxlength="80"
           placeholder="搜索..." spellcheck="false"
           type="search" class="search-input">
  </div>
  <span class="popup-btn-close" role="button">
    <i class="fa fa-times-circle"></i>
  </span>
</div>
<div class="search-result-container no-result">
  <div class="search-result-icon">
    <i class="fa fa-spinner fa-pulse fa-5x"></i>
  </div>
</div>

    </div>
  </div>

</header>
        
  
  <aside class="sidebar">

    <div class="sidebar-inner sidebar-nav-active sidebar-toc-active">
      <ul class="sidebar-nav">
        <li class="sidebar-nav-toc">
          文章目录
        </li>
        <li class="sidebar-nav-overview">
          站点概览
        </li>
      </ul>

      <div class="sidebar-panel-container">
        <!--noindex-->
        <div class="post-toc-wrap sidebar-panel">
            <div class="post-toc animated"><ol class="nav"><li class="nav-item nav-level-1"><a class="nav-link" href="#%E5%80%92%E6%8E%92%E7%B4%A2%E5%BC%95"><span class="nav-number">1.</span> <span class="nav-text">倒排索引</span></a><ol class="nav-child"><li class="nav-item nav-level-2"><a class="nav-link" href="#%E5%8F%8D%E5%90%91%E6%A1%A3%E6%A1%88%E7%B4%A2%E5%BC%95"><span class="nav-number">1.1.</span> <span class="nav-text">反向档案索引</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#%E5%AE%8C%E5%85%A8%E5%8F%8D%E5%90%91%E7%B4%A2%E5%BC%95"><span class="nav-number">1.2.</span> <span class="nav-text">完全反向索引</span></a></li></ol></li></ol></div>
        </div>
        <!--/noindex-->

        <div class="site-overview-wrap sidebar-panel">
          <div class="site-author animated" itemprop="author" itemscope itemtype="http://schema.org/Person">
  <p class="site-author-name" itemprop="name">SongyangJi</p>
  <div class="site-description" itemprop="description"></div>
</div>
<div class="site-state-wrap animated">
  <nav class="site-state">
      <div class="site-state-item site-state-posts">
        <a href="/archives/">
          <span class="site-state-item-count">251</span>
          <span class="site-state-item-name">日志</span>
        </a>
      </div>
      <div class="site-state-item site-state-categories">
          <a href="/categories/">
        <span class="site-state-item-count">45</span>
        <span class="site-state-item-name">分类</span></a>
      </div>
      <div class="site-state-item site-state-tags">
          <a href="/tags/">
        <span class="site-state-item-count">109</span>
        <span class="site-state-item-name">标签</span></a>
      </div>
  </nav>
</div>

        </div>
      </div>
    </div>

    
  </aside>


    </div>

    <div class="main-inner post posts-expand">


  


<div class="post-block">
  
  

  <article itemscope itemtype="http://schema.org/Article" class="post-content" lang="zh-CN">
    <link itemprop="mainEntityOfPage" href="http://example.com/2022/08/12/%E5%80%92%E6%8E%92%E7%B4%A2%E5%BC%95/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="image" content="/images/avatar.gif">
      <meta itemprop="name" content="SongyangJi">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="JsyBlog">
      <meta itemprop="description" content="">
    </span>

    <span hidden itemprop="post" itemscope itemtype="http://schema.org/CreativeWork">
      <meta itemprop="name" content="倒排索引 | JsyBlog">
      <meta itemprop="description" content="">
    </span>
      <header class="post-header">
        <h1 class="post-title" itemprop="name headline">
          倒排索引
        </h1>

        <div class="post-meta-container">
          <div class="post-meta">
    <span class="post-meta-item">
      <span class="post-meta-item-icon">
        <i class="far fa-calendar"></i>
      </span>
      <span class="post-meta-item-text">发表于</span>

      <time title="创建时间：2022-08-12 10:23:03" itemprop="dateCreated datePublished" datetime="2022-08-12T10:23:03+08:00">2022-08-12</time>
    </span>
    <span class="post-meta-item">
      <span class="post-meta-item-icon">
        <i class="far fa-calendar-check"></i>
      </span>
      <span class="post-meta-item-text">更新于</span>
      <time title="修改时间：2022-09-02 22:44:06" itemprop="dateModified" datetime="2022-09-02T22:44:06+08:00">2022-09-02</time>
    </span>
    <span class="post-meta-item">
      <span class="post-meta-item-icon">
        <i class="far fa-folder"></i>
      </span>
      <span class="post-meta-item-text">分类于</span>
        <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
          <a href="/categories/%E6%95%B0%E6%8D%AE%E7%BB%93%E6%9E%84/" itemprop="url" rel="index"><span itemprop="name">数据结构</span></a>
        </span>
    </span>

  
</div>

        </div>
      </header>

    
    
    
    <div class="post-body" itemprop="articleBody">
        <blockquote>
<p>正向索引：存储每个文档的单词的列表。</p>
<p>正向索引的查询往往满足每个文档有序频繁的全文查询。</p>
</blockquote>
<h1 id="倒排索引"><a href="#倒排索引" class="headerlink" title="倒排索引"></a>倒排索引</h1><blockquote>
<p>反向索引数据结构是典型的<a target="_blank" rel="noopener" href="https://zh.m.wikipedia.org/wiki/%E6%90%9C%E7%B4%A2%E5%BC%95%E6%93%8E">搜索引擎</a><a target="_blank" rel="noopener" href="https://zh.m.wikipedia.org/wiki/%E6%AA%A2%E7%B4%A2">检索</a><a target="_blank" rel="noopener" href="https://zh.m.wikipedia.org/wiki/%E7%AE%97%E6%B3%95">算法</a>重要的部分。</p>
</blockquote>
<p><strong>倒排索引</strong>（英语：Inverted index），也常被称为<strong>反向索引</strong>、<strong>置入档案</strong>或<strong>反向档案</strong>，是一种索引方法，被<strong>用来存储在全文搜索下某个单词在一个文档或者一组文档中的存储位置的映射</strong>。它是<em>文档检索系统</em>中最常用的数据结构。</p>
<p>有两种不同的反向索引形式：</p>
<ul>
<li>一条记录的水平反向索引（或者<strong>反向档案索引</strong>）包含每个引用单词的文档的[列表]。</li>
<li>一个单词的水平反向索引（或者<strong>完全反向索引</strong>）又包含每个单词在一个文档中的位置。</li>
</ul>
<p>后者的形式提供了更多的兼容性（比如<em>短语搜索</em>），但是需要更多的时间和空间来创建。</p>
<p>举例，下面是要被索引的文本：</p>
<ul><li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle T_{0}=}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <msub>
          <mi>T</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mn>0</mn>
          </mrow>
        </msub>
        <mo>=</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle T_{0}=}</annotation>
  </semantics></math></span><noscript><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1ab3d9202f18a678affe9a339511bef0a7b8b110" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.671ex; width:4.865ex; height:2.509ex;" alt="{\displaystyle T_{0}=}"></noscript><img width="0" height="0" class="mwe-math-fallback-image-inline image-lazy-loaded" alt="{\displaystyle T_{0}=}" usemap="undefined" src="https://wikimedia.org/api/rest_v1/media/math/render/svg/1ab3d9202f18a678affe9a339511bef0a7b8b110" srcset="" style="width: 4.865ex; height: 2.509ex; vertical-align: -0.671ex;"></span><code>0. "it is what it is"</code></li>
<li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle T_{1}=}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <msub>
          <mi>T</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mn>1</mn>
          </mrow>
        </msub>
        <mo>=</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle T_{1}=}</annotation>
  </semantics></math></span><noscript><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/57ac5840245616f79874f4635b3bcb2e3da344dd" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.671ex; width:4.865ex; height:2.509ex;" alt="{\displaystyle T_{1}=}"></noscript><img width="0" height="0" class="mwe-math-fallback-image-inline image-lazy-loaded" alt="{\displaystyle T_{1}=}" usemap="undefined" src="https://wikimedia.org/api/rest_v1/media/math/render/svg/57ac5840245616f79874f4635b3bcb2e3da344dd" srcset="" style="width: 4.865ex; height: 2.509ex; vertical-align: -0.671ex;"></span><code>1. "what is it"</code></li>
<li><span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML" alttext="{\displaystyle T_{2}=}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <msub>
          <mi>T</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mn>2</mn>
          </mrow>
        </msub>
        <mo>=</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle T_{2}=}</annotation>
  </semantics></math></span><noscript><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5713cf5634b3846b4d68c3383b65db289511d8bf" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.671ex; width:4.865ex; height:2.509ex;" alt="{\displaystyle T_{2}=}"></noscript><img width="0" height="0" class="mwe-math-fallback-image-inline image-lazy-loaded" alt="{\displaystyle T_{2}=}" usemap="undefined" src="https://wikimedia.org/api/rest_v1/media/math/render/svg/5713cf5634b3846b4d68c3383b65db289511d8bf" srcset="" style="width: 4.865ex; height: 2.509ex; vertical-align: -0.671ex;"></span><code>2. "it is a banana"</code></li></ul>



<h2 id="反向档案索引"><a href="#反向档案索引" class="headerlink" title="反向档案索引"></a>反向档案索引</h2><p>我们就能得到下面的反向文件索引：</p>
<pre> "a":      {2}
 "banana": {2}
 "is":     {0, 1, 2}
 "it":     {0, 1, 2}
 "what":   {0, 1}
</pre>
<p>如果要检索 ‘what’、’is’、’it’将对应这个集合：<br>$$<br>{0,1}\cap{0,1,2}\cap{0,1,2}&#x3D;{0,1}<br>$$</p>
<h2 id="完全反向索引"><a href="#完全反向索引" class="headerlink" title="完全反向索引"></a>完全反向索引</h2><p>对相同的文字，我们得到后面这些完全反向索引，由文档数量和当前查询的单词结果组成的的成对数据。 同样，文档数量和当前查询的单词结果都从零开始。所以，”banana”: {(2, 3)} 就是说 “banana”在第三个文档里 T2，而且在第三个文档的位置是第四个单词(地址为 3)。</p>
<pre>"a":      {(2, 2)}
"banana": {(2, 3)}
"is":     {(0, 1), (0, 4), <b>(1, 1)</b>, (2, 1)}
"it":     {(0, 0), (0, 3), <b>(1, 2)</b>, (2, 0)} 
"what":   {(0, 2), <b>(1, 0)</b>}
</pre>


<p>如果我们执行短语搜索<code>&quot;what is it&quot;</code> 我们得到这个短语的全部单词各自的结果所在文档为文档0和文档1。但是这个短语检索的连续的条件仅仅在文档1得到。</p>

    </div>

    
    
    

    <footer class="post-footer">

        

          <div class="post-nav">
            <div class="post-nav-item">
                <a href="/2022/07/26/JUnit%E5%92%8CMockito/" rel="prev" title="JUnit和Mockito">
                  <i class="fa fa-chevron-left"></i> JUnit和Mockito
                </a>
            </div>
            <div class="post-nav-item">
                <a href="/2022/08/12/%E9%9D%A2%E8%AF%95%E5%9C%BA%E6%99%AF%E7%AE%97%E6%B3%95%E9%A2%98/" rel="next" title="面试场景算法">
                  面试场景算法 <i class="fa fa-chevron-right"></i>
                </a>
            </div>
          </div>
    </footer>
  </article>
</div>






</div>
  </main>

  <footer class="footer">
    <div class="footer-inner">


<div class="copyright">
  &copy; 
  <span itemprop="copyrightYear">2023</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">SongyangJi</span>
</div>
  <div class="powered-by">由 <a href="https://hexo.io/" rel="noopener" target="_blank">Hexo</a> & <a href="https://theme-next.js.org/muse/" rel="noopener" target="_blank">NexT.Muse</a> 强力驱动
  </div>

    </div>
  </footer>

  
  <div class="toggle sidebar-toggle" role="button">
    <span class="toggle-line"></span>
    <span class="toggle-line"></span>
    <span class="toggle-line"></span>
  </div>
  <div class="sidebar-dimmer"></div>
  <div class="back-to-top" role="button" aria-label="返回顶部">
    <i class="fa fa-arrow-up fa-lg"></i>
    <span>0%</span>
  </div>

<noscript>
  <div class="noscript-warning">Theme NexT works best with JavaScript enabled</div>
</noscript>


  
  <script src="https://cdnjs.cloudflare.com/ajax/libs/animejs/3.2.1/anime.min.js" integrity="sha256-XL2inqUJaslATFnHdJOi9GfQ60on8Wx1C2H8DYiN1xY=" crossorigin="anonymous"></script>
<script src="/js/comments.js"></script><script src="/js/utils.js"></script><script src="/js/motion.js"></script><script src="/js/schemes/muse.js"></script><script src="/js/next-boot.js"></script>

  <script src="https://cdnjs.cloudflare.com/ajax/libs/hexo-generator-searchdb/1.4.1/search.js" integrity="sha256-1kfA5uHPf65M5cphT2dvymhkuyHPQp5A53EGZOnOLmc=" crossorigin="anonymous"></script>
<script src="/js/third-party/search/local-search.js"></script>





  





</body>
</html>
